

/*
README: 
1. THIS DO-FILE CREATES ALL THE CLEAN DATASETS USED IN THE ANALYSIS.
2. IT IS NOT NECESSARY TO RUN THIS DO-FILE TO REPRODUCE THE RESULTS IN THE PAPER. ALL NECESSARY DATASETS ARE PROVIDED IN "Data\Intermediate" folder.
3. IN ORDER TO RUN THIS DO-FILE, YOU MUST PROCURE THE NECESSARY RAW DATA (CENSUS, CASEN, iPUMS ETC.) FROM PRIMARY SOURCES.
*/

clear all
set more off

	// Set path to main replication folder 
	global path "XXX"

	global raw "${path}/Data/Raw"
	global temp "${path}/Data/Raw/IPUMS/Temp"
	global int "${path}/Data/Intermediate"
	
	** Codes to install
	/* 
	ssc install scheme-modern, replace
	ssc install ftools, replace 
	ssc install reghdfe, replace 
	ssc install parmest, replace
	ssc install outreg2, replace
	ssc install winsor2, replace
	ssc install boottest, replace
	*/


////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
///////////////////////////			CENSUS			////////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////

	***************
	//	i. Census 1992
	***************
{
	use "${raw}/CENSUS/Personas Censo 1992.dta",clear

	rename Comuna_habitual_Origen3 comuna
	rename Comuna_Madre_Origen3 comuna_nac
	rename Situacion_empleo trabajo
	rename Ocupacion ocupacion
	rename Edad edad
	
	*only use people born in Chile
	rename A*o_llegada_pais year_arrival
	drop if year!=" "

	*household ID
	egen hh_id=group(Portafolio Vivienda Hogar)

	* Female
	rename Sexo	gender
	g       female = 1 if gender == 2
	replace female = 0 if gender == 1

	* Cohort
	g age_21 = 1992 - (edad - 21)
	g a21_yr = age_21-1972
	g a21_yr_post = a21_yr*(age_21>1972)
	label var a21_yr 		"Yr Age 21"
	label var a21_yr_post	"Yr Age 21 x Dictatorship"

	g a21_yr_f=a21_yr*female
	g a21_yr_post_f=a21_yr_post*female
	g a21_yr_m=a21_yr*(1-female)
	g a21_yr_post_m=a21_yr_post*(1-female)
	label var a21_yr_m 			"Male: Yr Age 21"
	label var a21_yr_post_m 	"Male: Yr Age 21 x Dictatorship"
	label var a21_yr_f 			"Female: Yr Age 21"
	label var a21_yr_post_f		"Female: Yr Age 21 x Dictatorship"
	
	* Education
	gen any_media_4p		= (Tipo>=12)|(Tipo>=3&Tipo<=11&Curso>=4)			
	gen any_college		= Tipo==14
	gen yrs_college=Curso if Tipo==14
	replace yrs_college=0 if yrs_college==.&any_media_4p==1

	*household status
	gen jefe=Parentesco==1 if Parentesco!=99
	gen hijo=inlist(Parentesco,4) if Parentesco!=99
	gen hermano=inlist(Parentesco,7) if Parentesco!=99
	
	* Labor force participation
	gen lfp=inlist(trabajo,1,2,3,4,5)
	gen unemployed=inlist(trabajo,4,5)
	replace unemployed=. if lfp==0

	* Occupation	
	tostring ocupacion, gen(occup)
	replace occup="0000" if occup=="0"
	replace occup="0110" if occup=="110"
	gen occ1=substr(occup,1,1)
	gen occ2=substr(occup,1,2)
	replace occ1="11" if occ2=="01"
	replace occ1="99" if occ2=="99"
	
	*Migration
	gen migration_abroad = Comuna_1987 <=998 if comuna!=99999&comuna_nac!=99999&Comuna_1987!=99999

	rename occup Ocupacion
	merge m:1 Ocupacion using "${raw}/EXTRA/Occup Personas Censo - Wage"	// Source: Own calculations based on CASEN
	drop if _merge == 2 
	drop _merge 
	replace median_wage_9200_3 = ln(median_wage_9200_3)
	replace median_wage_9296_3 = ln(median_wage_9296_3)
	replace median_wage_9217_3 = ln(median_wage_9217_3)

	levelsof occ1, local (occs)
	foreach x of local occs{
		gen Docc`x'=(occ1=="`x'") if ocupacion!=0&ocupacion!=9998&ocupacion!=9999
	}

	*Quintil por puntaje
	forvalues x = 1/5{
		gen Dqui`x'=(Quin==`x') if Quin!=0
	}
	
	egen Dqui2_4 = rowtotal(Dqui2 Dqui3 Dqui4) if Quin!=0

	* Round
	g year = 1992

	*Analysis vars
	egen id_comuna_female = group(comuna_nac female)
	egen id_cluster_cohort = group(age_21)

	* Keep main variables
	keep year comuna comuna_nac hh_id jefe hijo hermano age_21 a21* edad female any* yrs_college lfp unemployed Dqui* occ1 id* migration median_wage* Docc*
	preserve
		gen any_college_fm=any_college
		replace any_college_fm=. if any_media_4p==0
		collapse any_media_4p any_college_fm,by(age_21)
		rename any_media_4p any_media_4p_1992
		rename any_college_fm any_college_fm_1992	
		tempfile cohort_1992
		save `cohort_1992'
	restore

	compress

	*Robustness bandwidth
	keep if age_21>=1961 & age_21<=1984
	save "${int}/Data_Figure_A9_1992.dta",replace

	*Only leave relevant cohorts
	keep if age_21>=1964 & age_21<=1981
	
	* Save
	save "${int}/censo1992.dta",replace
}	
	***************
	//	ii. Census 2002
	***************
{
	use "${raw}/CENSUS/Personas Censo 2002.dta",clear
	
	rename P23B comuna
	rename P18	gender
	rename P22B comuna_nac
	rename P24B comuna_97
	rename P29 trabajo
	rename P30 patron
	rename P31 ocupacion
	rename P19 edad

	*only use people born in Chile
	drop if P22A==3 //numero obs con 3 es igual a numero que tiene un año de entrada al pais
	
	*household ID
	egen hh_id=group(Portafolio VN HN)
	
	* Female
	gen    	female = 1 if gender == 2
	replace female = 0 if gender == 1

	* Cohort
	g age_21 = 2002 - (edad - 21)
	g a21_yr = age_21-1972
	g a21_yr_post = a21_yr*(age_21>1972)
	label var a21_yr 		"Yr Age 21"
	label var a21_yr_post	"Yr Age 21 x Dictatorship"
	
	* Education 
	gen any_media_4p = (P26A>=13)|(P26A>=5&P26A<=12&P26B>=4) 		
	gen any_college	 = P26A==15

	* Labor force participation
	gen lfp=inlist(trabajo,1,2,3,4,5)
	gen unemployed=inlist(trabajo,3,5)
	replace unemployed=. if lfp==0
	
	* Occupation
	gen Docc2=ocupacion>=20&ocupacion<30 if ocupacion!=0

	*household status
	gen jefe=P17==1
	gen hijo=inlist(P17,4)	
	gen hermano=inlist(P17,8)
	
	* Round
	g year = 2002

	*Analysis vars
	egen id_comuna_female = group(comuna_nac female)
	egen id_cluster_cohort = group(age_21)
	tempfile migration_2002
	save `migration_2002'

	* Keep main variables
	keep year hh_id jefe hijo hermano comuna comuna_nac Docc2 age_21 a21* edad female any* lfp unemployed id*
	
	preserve
		gen any_college_fm=any_college
		replace any_college_fm=. if any_media_4p==0
		collapse any_media_4p any_college_fm,by(age_21)
		rename any_media_4p any_media_4p_2002
		rename any_college_fm any_college_fm_2002	
		tempfile cohort_2002
		save `cohort_2002'
	restore
	
	*Only leave relevant cohorts
	keep if age_21>=1964 & age_21<=1981
	
	* Save
	compress
	save "${int}/censo2002.dta",replace
}

	***************
	//	iii. Census 2017
	***************
{
	use "${raw}/CENSUS/Personas Censo 2017.dta",clear

	rename p09 edad

	*only use people born in Chile
	drop if p12>=3&p12<=8

	*household ID
	egen hh_id=group(region provincia comuna dc area zc_loc id_zona_loc nviv nhogar)
	
	* Female
	g       female = 1 if p08 == 2
	replace female = 0 if p08 == 1

	* Cohort
	g age_21 = 2017 - (edad - 21)
	g a21_yr = age_21-1972
	g a21_yr_post = a21_yr*(age_21>1972)
	label var a21_yr 		"Yr Age 21"
	label var a21_yr_post	"Yr Age 21 x Dictatorship"

	* County of birth
	g       comuna_nac = comuna    if p12 == 1
	replace comuna_nac = p12comuna if p12 == 2
		
	* Education
	gen any_education_17	= p13<3 if p13!=99
	gen any_media_4p_17	= (any_education_17==1&p15>=11)|(any_education_17==1&p15>=7&p15<=10&p14>=4)  if p13!=99&p14!=99&p15!=99		
	gen any_college_17		= any_education_17==1&(p15==12|p15==13|p15==14) if p13!=99&p15!=99
	
	*household status
	gen jefe=p07==1
	gen hijo=inlist(p07,5)
	gen hermano=inlist(p07,7)	
			
	* Round
	g year = 2017
	
	*Analysis vars
	egen id_comuna_female = group(comuna_nac female)
	egen id_cluster_cohort = group(age_21)

	* Keep main variables
	keep year hh_id jefe hijo hermano comuna comuna_nac age_21 a21* female any* id*
	preserve
		gen any_college_fm=any_college
		replace any_college_fm=. if any_media_4p==0
		collapse any_media_4p any_college_fm,by(age_21)
		rename any_media_4p any_media_4p_2017
		rename any_college_fm any_college_fm_2017			
		tempfile cohort_2017
		save `cohort_2017'
	restore

	*Only leave relevant cohorts
	keep if age_21>=1964 & age_21<=1981
	
	* Save
	compress
	save "${int}/censo2017.dta",replace

}

////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
///////////////////////////			CASEN			////////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////

{
	*This section creates subsamples for each CASEN wave with info on educational attainment
	*It is then all merged and collapsed by cohort to be merged with other data

	***************
	//i. 2017
	***************

	use "${raw}/CASEN/casen2017.dta", clear
	
	* Education
	gen any_media_4p	= e6a>=12|(e6a>=8&e6a<=11&e6b>=4) if e6a<99&e6b<99
	gen any_college		= e6a>=14 if e6a<99

	* Females
	g female = 1 if sexo == 2
	replace female = 0 if sexo == 1

	*Labor force participation/unemployment
	gen lfp=(o1==1|o2==1|o3==1|o6==1)	
	gen unemployed=(o1==2&o2==2&o3==2&o6==1)
	
	*We checked and in all other years cor = aj and 2013-2017 is just causing trouble because of different var name
	rename yautcor yautaj
	rename ysub ysubaj
	
	* Keep main variables
	keep any* edad comuna female yautaj ysubaj lfp unemployed qaut 

	* Round
	g year=2017
	
	* Save
	compress
	tempfile casen2017
	save `casen2017'

	***************
	//i. 2015
	***************

	use "${raw}/CASEN/casen2015.dta", clear
	
	* Education
	gen any_media_4p	= e6a>=12|(e6a>=8&e6a<=11&e6b>=4) if e6a<99&e6b<99
	gen any_college		= e6a>=14 if e6a<99

	* Females
	g female = 1 if sexo == 2
	replace female = 0 if sexo == 1

	*Labor force participation/unemployment
	gen lfp=(o1==1|o2==1|o3==1|o6==1)	
	gen unemployed=(o1==2&o2==2&o3==2&o6==1)
	
	*We checked and in all other years cor = aj and 2013-2017 is just causing trouble because of different var name
	rename yautcor yautaj
	rename ysub ysubaj
	
	* Keep main variables
	keep any* edad comuna female yautaj ysubaj lfp unemployed qaut 

	* Round
	g year=2015
	
	* Save
	compress
	tempfile casen2015
	save `casen2015'

	***************
	//ii. 2013
	***************

	use "${raw}/CASEN/casen2013.dta", clear
	
	* Education
	gen any_media_4p	= e6a>=11|(e6a>=7&e6a<=10&e6b>=4) if e6a<99&e6b<99
	gen any_college		= e6a>=12 if e6a<99

	* Females
	g female = 1 if sexo == 2
	replace female = 0 if sexo == 1
	
	*Labor force participation/unemployment
	gen lfp=(o1==1|o2==1|o3==1|o6==1)	
	gen unemployed=(o1==2&o2==2&o3==2&o6==1)
	
	*We checked and in all other years cor = aj and 2013-2017 is just causing trouble because of different var name
	rename yautcor yautaj
	rename ysub ysubaj
	rename qaut_mn qaut 
	
	* Keep main variables
	keep any* edad comuna female yautaj ysubaj lfp unemployed qaut 

	* Round
	g year=2013
	
	* Save
	compress
	tempfile casen2013
	save `casen2013'

	***************	
	//iii. 2011
	***************

	use "${raw}/CASEN/casen2011.dta", clear

	* Education
	gen any_media_4p	= e6a>=11|(e6a>=7&e6a<=10&e6c>=4) if e6a<99&e6c<99
	gen any_college		= e6a>=12 if e6a<99

	* Females
	g female = 1 if sexo == 2
	replace female = 0 if sexo == 1

	*Labor force participation/unemployment
	gen lfp=(o1==1|o2==1|o3==1|o6==1)	
	gen unemployed=(o1==2&o2==2&o3==2&o6==1)
		
	* Keep main variables
	keep any* edad comuna female yautaj ysubaj lfp unemployed qaut 

	* Round
	g year=2011
	
	* Save
	compress
	tempfile casen2011
	save `casen2011'

	***************	
	//iv. 2009
	***************

	use "${raw}/CASEN/casen2009.dta", clear

	* Education
	gen any_media_4p	= (e7t>=9&e7t<16)|(e7t>=5&e7t<=8&e7c>=4) if e7t<99&e7c<99
	gen any_college		= e7t>=13&e7t<=15 if e7t<99

	* Females
	g female = 1 if sexo == 2
	replace female = 0 if sexo == 1	
	
	*Labor force participation/unemployment
	gen lfp=(o1==1|o2==1|o3==1|o4==1)	
	gen unemployed=(o1==2&o2==2&o3==2&o4==1)
	
	* Keep main variables
	keep any* edad comuna female yautaj ysubaj lfp unemployed qaut 

	* Round
	g year=2009
	
	* Save
	compress
	tempfile casen2009
	save `casen2009'

	***************	
	//v. 2006
	***************

	use "${raw}/CASEN/casen2006.dta", clear

	* Education
	gen any_media_4p	= (e8t>=9&e8t<16)|(e8t>=5&e8t<=8&e8c>=4) if e8t<99&e8c<99
	gen any_college		= e8t>=13&e8t<=15 if e8t<99

	* Females
	g female = 1 if sexo == 2
	replace female = 0 if sexo == 1	
	
	*Labor force participation/unemployment
	gen lfp=(o1==1|o2==1|o3==1|o4==1)	
	gen unemployed=(o1==2&o2==2&o3==2&o4==1)
	
	* Keep main variables
	keep any* edad comuna female yautaj ysubaj lfp unemployed qaut 

	* Round
	g year=2006
	
	* Save
	compress
	tempfile casen2006
	save `casen2006'

	***************
	//vi. 2003
	***************

	use "${raw}/CASEN/casen2003.dta", clear
	rename comu comuna

	* Education
	gen any_media_4p	= (e7t>=9&e7t<16)|(e7t>=5&e7t<=8&e7c>=4) if e7t<99&e7c<99
	gen any_college		= e7t>=13&e7t<=15 if e7t<99

	* Females
	g female = 1 if sexo == 2
	replace female = 0 if sexo == 1
	
	*Labor force participation/unemployment
	gen lfp=(o1==1|o2==1|o3==1)	
	gen unemployed=(o1==2&o2==2&o3==1)
	
	* Keep main variables
	keep any* edad comuna female yautaj ysubaj lfp unemployed qaut 

	* Round
	g year=2003
	
	* Save
	compress
	tempfile casen2003
	save `casen2003'
	
	***************
	//vii. 2000
	***************

	use "${raw}/CASEN/casen2000.dta", clear
	rename comu comuna

	* Education
	gen any_media_4p	= (e9>=9&e9<16)|(e9>=5&e9<=8&e8>=4) if e9<99&e8<99
	gen any_college		= e9>=13&e9<=15 if e9<99

	* Females
	g female = 1 if sexo == 2
	replace female = 0 if sexo == 1	
	
	*Labor force participation/unemployment
	gen lfp=(o1==1|o2==1|o3==1)	
	gen unemployed=(o1==2&o2==2&o3==1)
	
	* Keep main variables
	keep any* edad comuna female yautaj ysubaj lfp unemployed qaut 

	* Round
	g year=2000
	
	* Save
	compress
	tempfile casen2000
	save `casen2000'

	***************
	//viii. 1998
	***************

	use "${raw}/CASEN/casen1998.dta", clear
	rename comu comuna

	* Education
	gen any_media_4p	= (e6>=9&e6<16)|(e6>=5&e6<=8&e5>=4) if e6<99&e5<99	
	gen any_college		= e6>=13&e6<=15 if e6<99

	* Females
	g female = 1 if sexo == 2
	replace female = 0 if sexo == 1
	
	*Labor force participation/unemployment
	gen lfp=(o1==1|o2==1|o3==1)	
	gen unemployed=(o1==2&o2==2&o3==1)
	
	* Keep main variables
	keep any* edad comuna female yautaj ysubaj lfp unemployed qaut 

	* Round
	g year=1998
	
	* Save
	compress
	tempfile casen1998
	save `casen1998'

	***************	
	//ix. 1996
	***************

	use "${raw}/CASEN/casen1996.dta", clear
	rename comu comuna

	* Education
	gen any_media_4p	= (e6>=9&e6<16)|(e6>=5&e6<=8&e5>=4) if e6<99&e5<99		
	gen any_college		= e6>=13&e6<=15 if e6<99

	* Females
	g female = 1 if sexo == 2
	replace female = 0 if sexo == 1
	
	*Labor force participation/unemployment
	gen lfp=(o1==1|o2==1|o3==1)	
	gen unemployed=(o1==2&o2==2&o3==1)
	
	* Keep main variables
	keep any* edad comuna female yautaj ysubaj lfp unemployed qaut 

	* Round
	g year=1996
	
	* Save
	compress
	tempfile casen1996
	save `casen1996'

	***************	
	//ix. 1994
	***************

	use "${raw}/CASEN/casen1994.dta", clear
	rename comu comuna

	* Education
	gen any_media_4p	= (e9>=9&e9<14)|(e9>=5&e9<=8&e8>=4) if e9<99&e8<99		
	gen any_college		= inlist(e9,9,10,13) if e9<99

	* Females
	g female = 1 if sexo == 2
	replace female = 0 if sexo == 1
	
	*Labor force participation/unemployment
	gen lfp=(o1==1|o2==1|o3==1)	
	gen unemployed=(o1==2&o2==2&o3==1)
	
	* Keep main variables
	keep any* edad comuna female yautaj ysubaj lfp unemployed qaut 

	* Round
	g year=1994
	
	* Save
	compress
	tempfile casen1994
	save `casen1994'

	***************	
	//x. 1992
	***************

	use "${raw}/CASEN/casen1992.dta", clear
	rename comu comuna

	* Education
	gen any_media_4p	= (e8>=6&e8<11)|(e8>=3&e8<=5&e7>=4&e7<9) if e8<13&e7<99		
	gen any_college		= inlist(e8,6,7,10) if e8<13

	* Females
	g female = 1 if sexo == 2
	replace female = 0 if sexo == 1
	
	*Labor force participation/unemployment
	gen lfp=(o1==1|o2==1|o3==1)	
	gen unemployed=(o1==2&o2==2&o3==1)
	

	* Keep main variables
	keep any* edad comuna female yautaj ysubaj lfp unemployed qaut 

	* Round
	g year=1992
	
	* Save
	compress
	tempfile casen1992
	save `casen1992'

	***************	
	//xi. 1990
	***************

	use "${raw}/CASEN/casen1990.dta", clear
	rename comu comuna

	* Education
	gen any_media_4p	= (e4>=5&e4<7)|(e4>=3&e4<=4&e3>=4&e3<9) if e4<10&e3<10		
	gen any_college		= e4==5 if e4<10

	* Females
	g female = 1 if sexo == 2
	replace female = 0 if sexo == 1
	
	*Labor force participation/unemployment
	gen lfp=(o1==1|o2==1|o3==1)	
	gen unemployed=(o1==2&o2==2&o3==1)
	
	* Keep main variables
	keep any* edad comuna female yautaj ysubaj lfp unemployed qaut 

	* Round
	g year=1990
	
	* Save
	compress
	tempfile casen1990
	save `casen1990'

	***************	
	//xii. APPEND ALL
	***************	
	
	foreach x of numlist 1992 1994 1996 1998 2000 2003 2006 2009 2011 2013 2015 2017{
		append using `casen`x''
	}

	*Cohort
	g age_21 = year - (edad - 21)
	g a21_yr = age_21-1972
	g a21_yr_pre = a21_yr*(age_21<=1972)
	g a21_yr_post = a21_yr*(age_21>1972)
	g a21_yr_f=a21_yr*female
	g a21_yr_post_f=a21_yr_post*female
	g a21_yr_m=a21_yr*(1-female)
	g a21_yr_post_m=a21_yr_post*(1-female)
	label var a21_yr 		"Yr Age 21"
	label var a21_yr_post	"Yr Age 21 x Dictatorship"

	forvalues y=1964/1981{
		gen any_college_`y'=any_college*age_21==`y'
	}

	
	forvalues x =1/5{
		gen Dqaut_`x'=qaut==`x' if qaut!=0&qaut!=.
	}
	egen Dqaut_2_4 = rowtotal(Dqaut_2 Dqaut_3 Dqaut_4) if qaut!=0&qaut!=.

	* Add cpi: FP.CPI.TOTL	Consumer price index (2010 = 100). Source: WDI (https://data.worldbank.org/indicator/FP.CPI.TOTL?locations=CL)
	merge m:1 year using "${raw}/EXTRA/wb_ipc.dta"
	drop if _m == 2
	g aux = ipc if year == 2015
	egen ipc_15 = mean(aux)
	replace ipc = ipc/ipc_15
	drop aux _m				
	
	* Income in real terms
	foreach x of varlist yautaj ysubaj{
		gen `x'_r=`x'/ipc
	}
	
	egen ytotalaj_r=rowtotal(yautaj_r ysubaj_r)
	winsor2 ytotalaj_r, c(1 99)
	replace ytotalaj_r_w=ytotalaj_r_w/1000	
	gen ln_ytotalaj_r_w=ln(ytotalaj_r_w)

	drop ipc*
	
	replace unemployed=. if lfp==0
	
	egen id_comuna_female = group(comuna female)
	egen id_cluster_cohort = group(age_21)
	
	preserve
		gen any_college_fm=any_college
		replace any_college_fm=. if any_media_4p==0
		collapse any_media_4p any_college_fm,by(age_21)
		tempfile cohort_casen
		rename any_media_4p any_media_4p_casen
		rename any_college_fm any_college_fm_casen			
		save `cohort_casen'
	restore

	compress

	*Robustness bandwidth
	keep if age_21>=1961 & age_21<=1984
	save "${int}/Data_Figure_A9_casen.dta",replace

	keep if age_21>=1964 & age_21<=1981
	* Save
	save "${int}/casen_all.dta",replace
}
	
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
///////////////////////////			OTHERS			////////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////

////////////////////////////////////////////////////////////
/// APPEND COHORTS FROM DIFFERENT SOURCES: LONGER SERIES ///
////////////////////////////////////////////////////////////
{
	clear
	use `cohort_1992'
	merge 1:1 age_21 using `cohort_2002', nogen
	merge 1:1 age_21 using `cohort_2017', nogen
	merge 1:1 age_21 using `cohort_casen', nogen
	save "${int}/cohort_all_sources.dta",replace
}

/////////////
/// IPUMS ///
/////////////
{	
	import excel "${raw}/IPUMS/names-directory.xlsx", clear first
	tempfile merged_names
	save `merged_names'

	local dir_list : dir "${raw}/IPUMS" files "*.csv" 
	dis `dir_list'

	foreach pais of local dir_list {
		dis "**** `pais' ****"
	
		*** Import data:	
		import delimited "${raw}/IPUMS/`pais'", clear
		keep if year>=1988&year<=1997
		levelsof year, local(years)
		if "`years'"!=""{
			**** Save temporary dataset for each year:
			save "${temp}/temp-pais", replace

			*** Loop over years:
			foreach yr of local years {
				clear 
				*** Open temp dataset:
				use "${temp}/temp-pais", clear

				keep if year == `yr'

				*** Merge with names:
				merge m:1 country using `merged_names', keep(1 3) nogen

				*** Merge w/ country code:
				merge m:1 COUNTRY using "${raw}/IPUMS/correspondance_country_name_codes.dta", ///
				keep(1 3) keepusing(A3UN) nogen
				replace A3UN = COUNTRY if A3UN == ""
				order country COUNTRY A3UN	

				*** Keep only variables relevant for Chile:
				local variables 
				foreach keep_var in country COUNTRY A3UN year pernum perwt age sex edattaind labforce occisco{
					cap des `keep_var'	
					if _rc!=111 {
						local variables `variables' `keep_var' 
					}
				}
				keep `variables'

				*** Only fix countries w/ cohort:
				local name = A3UN[1]
				cap des age  
				if _rc!=111 & "`name'" !="" {
					*** Fix each variable:
					local c_code = A3UN[1]
					gen  countrycode="`c_code'"
					label var countrycode "Country Code"

					**** Cohort:
					drop if age == 999 // No info on age
					drop if age<20

					gen cohort=year-age 
					label var cohort "Cohort: year - age"

					**** Gender:
					recode sex (1=0) (2=1)
					label var sex "Sex: Female"
		  
					**** Population in age to work:
					gen agework=1 if age>18 & age<66 
					recode agework .=0
					label var agework "Working Age"
					drop age

					**** Education:
					cap des edattain  
					if _rc!=111 {
						***** University: 
						gen full_college=1 if inrange(edattain,400,499)
						recode full_college .=0
						label var full_college "Full College"
					}

					**** Labor Force Participation:
					cap des labforce 
					if _rc!=111 {
						***** Labor Force:
						gen lfp = 1 if labforce == 2
						recode lfp .=0 
						label var lfp "In Labor Force"		
						drop labforce
					} 
					
					***** Professionals:	
					cap des occisco
					if _rc!=111 {
						gen occ_prof = 1 if occisco == 2
						recode occ_prof .=0   
						label var occ_prof "Professional"		
					}
					
					**** Collapse:
					local variables 
					foreach keep_var in sex full_college lfp occ_prof{
						cap des `keep_var'
						if _rc!=111 {
							local variables `variables' `keep_var'
						}	
					}

					***** Save labels:
					foreach v of var * {
						local l`v' : variable label `v' 
						if `"`l`v''"' == "" {
							local l`v' "`v'"
						}
					}
		 
					collapse (mean) year country `variables' [iw=perwt], by(cohort countrycode) 

					foreach xvar of local variables {
						quietly: sum `xvar'
						if r(mean) == 0 & r(sd) == 0 {
							replace `xvar' = .
						} 
					}

					***** Assign labels:
					foreach v of var * {
						label var `v' `"`l`v''"'
					}

					compress

					local name = countrycode[1]
					save "${temp}/ip_`name'_`yr'", replace
				}

			}

			erase "${temp}/temp-pais.dta" 
	
		}
	}
	
	clear 
	* Extract dataset names:  
	local dir_list : dir "${temp}" files "*.dta" 
	dis `dir_list'
																				
	* Append all countries
	foreach pais of local dir_list {
		append using "${temp}/`pais'", force
	}

	* LATAM:  
	gen latam=0
	replace latam=1 if countrycode=="ARG" | countrycode=="BOL" | countrycode=="BRA" | countrycode=="CHL" | countrycode=="COL" | countrycode=="CRI" | countrycode=="CUB" | countrycode=="DOM" | countrycode=="ECU" | countrycode=="HND" | countrycode=="HTI" | countrycode=="MEX" | countrycode=="NIC" | countrycode=="PAN" | countrycode=="PER" | countrycode=="PRY" | countrycode=="SLV" | countrycode=="URY" 
	levelsof countrycode if latam==1 

	* Merge w/ Dictatorship Data:
	merge m:1 countrycode using "${raw}/IPUMS/Dictatorships_50-90", gen(merge_dict) keepusing(NAME YEARS YEAR1 YEAR2)
	gen dict_5090=(merge_dict==3)
	drop if merge_dict==2

	rename country CountryID

	* Keep years of interest:
	keep if inrange(cohort, 1930, 1978)

	* Generate sample of relevant censuses:

	*** Chile 1992:
	tab countrycode year if inrange(year, 1988,1997) 

	gen sample_1992 = inrange(year,1988,1997)
	tab countrycode if sample_1992 ==1, sort
	duplicates tag countrycode cohort if sample_1992==1, gen(drp)
	tab countrycode year if drp == 1
	
	**** Keep closest census to Chile if more than one:
	replace sample_1992 = 0 if countrycode == "IDN" & year == 1995
	replace sample_1992 = 0 if countrycode == "IRL" 
	replace sample_1992 = 0 if countrycode == "MEX" & year == 1995
	replace sample_1992 = 0 if countrycode == "PHL" & year == 1995

	* Drop countries with incomplete info:
	bys CountryID year: egen non_m = count(cohort) if sample_1992 == 1			
	tab non_m, m
	replace sample_1992 = 0 if non_m < 39 // Incomplete data
	drop non_m drp
	keep if sample_1992==1
	
	codebook countrycode				 
	sort countrycode year cohort
	order countrycode cohort year 	
	
	save "${int}/clean-ipums-panel", replace	
}

///////////
/// EOD ///
///////////
{
	clear

	*-------------------------------------------------------------------------------*
	* Import datasets: no data for 2006
	*-------------------------------------------------------------------------------*

	forval j = 1957/2018{
		if `j'!=2006{
			use "${raw}/EOD/junio`j'", clear
			gen year = `j'
			tempfile eod_`j'
			save `eod_`j''
		}
	}

	clear
	use `eod_1957'

	forval j = 1958/2018{
		if `j'!=2006{
			append using `eod_`j''
		}
	}
	* Save
	compress
	save "${int}/eod_all.dta", replace
}

//////////////////////////////////////////////////////////////////////
/// 1988 VOTER REGISTRATION and 207 VOTERS ABROAD ///
//////////////////////////////////////////////////////////////////////
{
	*voters registered in 1988
	use "${raw}/EXTRA/turnout_2017_full.dta",clear
	keep if nacionalidad==1&round==1

	gen ins_pleb=fecha==1987|fecha==1988 
	 
	g age_21 = 2017 - (edad - 21)
	g a21_yr = age_21-1972
	g a21_yr_post = a21_yr*(age_21>1972)
	collapse ins_pleb,by(age_21)

	gen age_21_pre=cond(age_21<=1972,age_21 - 1972,0)
	gen age_21_post=cond(age_21>1972,age_21 - 1972,0)
	gen age_21_all=age_21 - 1972

	keep if age_21>=1964&age_21<=1981

	compress 
	save "${int}/Data_Figure_6.dta", replace

	*voters abroad
	use "${raw}/EXTRA/turnout_2017_full.dta",clear

	keep if round==1
	g age_21 = 2017 - (edad - 21)
	gen migrant=pais!=14
	collapse migrant, by (age_21)

	keep if age_21>=1964&age_21<=1981
	gen age_21_pre=cond(age_21<=1972,age_21 - 1972,0)
	gen age_21_post=cond(age_21>1972,age_21 - 1972,0)
	gen age_21_all=age_21 - 1972
	save "${int}/voters_abroad.dta", replace
}

////////////////////////////////
/// 1988 PLEBISCITE: NO VOTE ///
////////////////////////////////
{
	use "${int}/censo1992.dta",clear

	*drop people abroad and unknown county + w/o full secondary
	keep if comuna>=1100&comuna<99999
	keep if any_media_4p==1
	
	*estimates comuna-specific kinks in college
	reg any_college i.comuna#c.a21_yr i.comuna#c.a21_yr_post, vce(cluster comuna)
	tempfile kink_comuna
	parmest,saving(`kink_comuna', replace)

	*reshapes results by comuna
	use `kink_comuna', clear
	drop if parm=="_cons"
	keep parm estimate stderr
	gen comuna=substr(parm,1,5)
	destring comuna,replace ignore("." "b" "o")
	gen temp=substr(parm,-1,1)
	gen post=(temp=="t")
	drop temp
	drop parm
	reshape wide estimate stderr,i(comuna) j(post)
	tostring comuna,gen(com)
	gen temp=substr(com,-2,2)
	drop if temp=="00"
	drop temp com

	** Adjusts estimates of kink for precision // shrinkage of estimates
	g var0 = stderr0^2
	g var1 = stderr1^2
	egen aux0 = mean(var0)
	egen aux1 = mean(var1)
	sum estimate0
	g SD0 = sqrt(`r(sd)'^2 - aux0)
	sum estimate0
	g estimate0_adj = estimate0 * (SD0^2 / (SD0^2 + var0))
	sum estimate1
	g SD1 = sqrt(`r(sd)'^2 - aux1)
	sum estimate1
	g estimate1_adj = estimate1 * (SD1^2 / (SD1^2 + var1))
	drop var* aux* SD*

	*combine w/ data from plebiscite
	gen code=comuna
	merge 1:m code using "${raw}/EXTRA/plebiscite_ajps"
	drop if _merge==2
	drop _merge
	encode Region,gen(reg)
	replace Pop70=Pop70/1000

	*run full regression to get estimating sample
	reg VoteShareNo estimate1_adj Pop70 sh_rural_70 sh_women_70 DistStgo DistRegCapital DistProvCapital i.reg share_allende70_v2, vce(robust)
	g sample = 1 if e(sample) == 1

	*standardize impact measure
	sum estimate1_adj if e(sample) == 1 
	gen estimate1_adj_std=(estimate1_adj-`r(mean)')/`r(sd)'
	
	save "${int}/Data_Plebiscite.dta", replace
}

//////////////////////////////////////////////////////////////////////////
/// WDI and FREEDOM HOUSE ON POLITICAL REGIMES AND TERTIARY ENROLLMENT ///
//////////////////////////////////////////////////////////////////////////
{
	* Data sourced from WB WDI using command: wbopendata, language(en - English) indicator(SE.TER.ENRR;NY.GDP.PCAP.KD) long clear
	* saved immediately as "wb_wdi_gdp_enrollment.dta"
	* last accessed 09/02/2024
	 
	use "${raw}/EXTRA/wb_wdi_gdp_enrollment.dta",clear

	drop if regionname == "Aggregates" 
	drop admin* region regionname lendingtype lendingtypename
	drop if incomelevel==""

	*We download raw data from https://freedomhouse.org/sites/default/files/2020-02/2020_Country_and_Territory_Ratings_and_Statuses_FIW1973-2020.xlsx (last accessed on 12/03/2020)
	*Relevant information is in the "Country Ratings, Statuses" sheet, but headers complicate importing into Stata, so we fixed by hand and saved as FIW.csv
	*Periodicity is a bit irregular in the 1980s. e.g., data is missing for 1982 (i.e. divided between 1981 and 1983)

	preserve
		clear
		import delimited "${raw}/EXTRA/FIW.csv", varnames(1) encoding(ISO-8859-2) 
		*Wide to long
		reshape long pr_ cl_ status_, i(cntry_name) j(year)	
		*ISO3C code
		kountry cntry_name, from(other) stuck
		rename _ISO3N_ _ISO3N_1
		kountry _ISO3N_1, from(iso3n) to(iso3c)
		drop _ISO3N_1
		rename _ISO3C_ cntcode3
		*Manually input empty missing codes
		*For countries with merges (e.g. East and West Germany), I am averaging across the units merged
		replace cntcode3 = "CPV" if cntry_name == "Cabo Verde"
		replace cntcode3 = "CZE" if cntry_name == "Czechoslovakia"
		replace cntcode3 = "SWZ" if cntry_name == "Eswatini"
		replace cntcode3 = "DEU" if cntry_name == "Germany, E. "
		replace cntcode3 = "DEU" if cntry_name == "Germany, W. "
		replace cntcode3 = "XKX" if cntry_name == "Kosovo"
		replace cntcode3 = "MNE" if cntry_name == "Montenegro"
		replace cntcode3 = "MKD" if cntry_name == "North Macedonia"
		replace cntcode3 = "MNE" if cntry_name == "Montenegro"
		replace cntcode3 = "SSD" if cntry_name == "South Sudan"
		replace cntcode3 = "TWN" if cntry_name == "Taiwan"
		replace cntcode3 = "VNM" if cntry_name == "Vietnam, N."
		replace cntcode3 = "VNM" if cntry_name == "Vietnam, S."	
		replace cntcode3 = "YEM" if cntry_name == "Yemen, N."
		replace cntcode3 = "YEM" if cntry_name == "Yemen, S."
		*Rename variables
		rename cntcode3 countrycode
		rename cntry_name countryname
		rename pr_ fiw_pr
		rename cl_ fiw_cl
		*Replace wrong values
		replace fiw_pr = "2" if fiw_pr == "2(5)"
		replace fiw_pr = "" if fiw_pr == "-"
		replace fiw_cl = "3" if fiw_cl == "3(6)"
		replace fiw_cl = "" if fiw_cl == "-"
		destring fiw_pr fiw_cl, replace
		*create final dataset
		collapse (mean) fiw_pr fiw_cl, by(countrycode year)
		gen fiw_unadj= 0.5*(fiw_pr + fiw_cl)
		gen fiw=(fiw_unadj-1)/6	// Normalized measure between 0 and 1.
		keep countrycode year fiw
		*create tempfile  for merge
		tempfile fiw_merge
		save `fiw_merge'
	restore
	merge 1:1 countrycode year using `fiw_merge'
	//23 small island nations missing in FH
	//FH missing data before 1971 and after 2019 for all countries
	//Taiwan missing in WDI

	*aggregate to decade
	gen decade = 10 * floor(year/10)
	collapse se_ter_enrr ny_gdp fiw,by(countrycode countryname decade)
	drop if decade==1960|decade==2020 //not enough data
	gen ln_gdp_pc=ln( ny_gdp_pcap_kd )

	compress
	save "${int}/wdi_fh.dta",replace
}

//////////////////////
/// MACRO CONTROLS ///
//////////////////////
{
	use "${int}/eod_all.dta",clear

	*youth unemployment
	preserve
		gen unemployed=100*(sitocup1==6|sitocup1==7) if sitocup1!=2&sitocup1!=4&sitocup1<9
		collapse unemployed if edad>=16&edad<=25,by(year)
		keep if year>=1964&year<=1981
		rename year age_21
		rename unemployed youth_u
		replace youth_u=youth_u/100
		tempfile unemp
		save `unemp'
	restore
	
	*youth public employment
	gen public =actecon==50
	collapse public if sitocup1==1&edad>=16&edad<=25,by(year)
	keep if year>=1964&year<=1981
	rename year age_21
	tempfile public
	save `public'

	*GDP per capita growth. Source: WB WDI (03/19/2021)
	use "${raw}/EXTRA/wb_wdi_chile.dta",clear
	keep if age_21>=1964&age_21<=1981
	rename NY_GDP_PCAP_KD_ZG gdp_pc_gr
	replace gdp_pc_gr=gdp_pc_gr/100
	keep age_21 gdp_pc_gr
	tempfile gdp
	save `gdp'

	use "${raw}/EXTRA/sindicatos.dta",clear
	duplicates drop rsu,force
	gen date_cons = date(fechaconst, "DMY")
	format date %td
	gen year_cons=yofd(date_cons)
	gen sindicatos=1
	collapse (sum) sindicatos, by(year_cons)
	keep if year>=1964&year<=1981
	rename year_cons age_21
	tempfile unions

	merge 1:1 age_21 using "${raw}/EXTRA/diaz_gvt.dta", nogen // Source: Diaz et al. (2016), Tabla 3.7, ID 3037, pp 305 and 307.  
	merge 1:1 age_21 using `unemp', nogen
	merge 1:1 age_21 using `public', nogen
	merge 1:1 age_21 using `gdp', nogen

	label var gdp_pc_gr 	"GDP Growth"
	label var gvt			"Public Spending"
	label var youth_u		"Youth Unemployment"
	label var public		"Youth Gvt Employment"	
	label var sindicatos	"New Trade Unions"	

	compress
	save "${int}/macro.dta",replace
}

/////////////////
/// MIGRATION ///
/////////////////
{
	* Students abroad. Source: UNESCO Statistical yearbooks.
	clear
	import excel "${raw}/EXTRA//Unesco Students Abroad.xlsx", sheet("Sheet1") firstrow
	rename Students students
	rename Country country
	sort students
	replace students="" in 1/306
	replace students="" in 767/771
	destring students,replace
	replace country=strrtrim(country)
	encode country,gen(ctry)

	gen region=0
	replace region=1 if (country=="Ireland"|country=="Greece"|country=="Portugal"|country=="Finland"|country=="Austria"|country=="Italy"|country=="Holy See"|country=="Denmark"|country=="Netherlands"|country=="Switzerland"|country=="Belgium"|country=="United Kingdom"|country=="Spain"|country=="Sweden"|country=="Federal Republic of Germany"|country=="France")
	replace region=2 if (country=="Poland"|country=="Czechoslovaquia"|country=="Yugoslavia"|country=="Hungary"|country=="Romania")
	replace region=3 if country=="Canada"|country=="United States of America"
	replace region=4 if (country=="Mexico"|country=="Honduras"|country=="Guatemala"|country=="Costa Rica"|country=="Panama"|country=="Cuba"|country=="Uruguay"|country=="Argentina")
	replace region=5 if (country=="Senegal"|country=="China"|country=="New Zealand"|country=="Algeria"|country=="Syrian Arab Republic"|country=="Australia"|country=="Jordan"|country=="Togo"|country=="Japan"|country=="Israel"|country=="Syria"|country=="Lebanon"|country=="Turkey"|country=="Republic of Korea"|country=="Qatar"|country=="India"|country=="Ivory Coast"|country=="Philippines")
	drop if region==0	//countries without students

	gen sample=students!=.
	egen tot_students=sum(students),by(year)
	egen tot_paises=sum(sample),by(year)
	collapse tot_students tot_paises , by(year)	
	replace tot_students=tot_students/1000
	sort year
	save "${int}/students_abroad.dta",replace

	* Chileans abroad
	* Source: Chilenos en el exterior 2003-2004, Anexo A, Cuadro 8
	* https://web.archive.org/web/20130520150932/http://www.chilesomostodos.gov.cl/descargas/doc_download/122-anexo-estadistico-a.html
	clear
	infile edad total nada primaria media tecnica superior postgrado ignorado using "${raw}/EXTRA/chileans_abroad.txt"
	tostring edad, replace
	replace edad = "" if edad == "."
	replace edad = "total" in 1
	replace edad = "05-09" in 2
	replace edad = "10-14" in 3
	replace edad = "15-19" in 4
	replace edad = "20-24" in 5
	replace edad = "25-29" in 6
	replace edad = "30-34" in 7
	replace edad = "35-39" in 8
	replace edad = "40-44" in 9
	replace edad = "45-49" in 10
	replace edad = "50-54" in 11
	replace edad = "55-59" in 12
	replace edad = "60-64" in 13
	replace edad = "65-69" in 14
	replace edad = "70-74" in 15
	replace edad = "75+" in 16
	tostring total-ignorado,replace force usedisplayformat
	destring total-ignorado,replace ignore(".")
	generate age_mid = .
	replace age_mid = 7 in 2
	replace age_mid = 12 in 3
	replace age_mid = 17 in 4
	replace age_mid = 22 in 5
	replace age_mid = 27 in 6
	replace age_mid = 32 in 7
	replace age_mid = 37 in 8
	replace age_mid = 42 in 9
	replace age_mid = 47 in 10
	replace age_mid = 52 in 11
	replace age_mid = 57 in 12
	replace age_mid = 62 in 13
	replace age_mid = 67 in 14
	replace age_mid = 72 in 15
	replace age_mid = 75 in 16
	g age_21 = 2003 - (age_mid - 21)
	
	preserve
		clear
		use `migration_2002'
		gen age_group=""
		replace age_group="35-39" if edad>=34&edad<=38
		replace age_group="40-44" if edad>=39&edad<=43
		replace age_group="45-49" if edad>=44&edad<=48
		replace age_group="50-54" if edad>=49&edad<=53
		replace age_group="55-59" if edad>=54&edad<=58
		replace age_group="60-64" if edad>=59&edad<=63
		replace age_group="65-69" if edad>=64&edad<=68
		replace age_group="70-74" if edad>=69&edad<=73
		gen people=1
		collapse (sum) people any_media_4p any_college if age_group!="",by(age_group)
		tempfile t1
		save `t1'
	restore
	
	rename edad age_group
	merge 1:1 age_group using `t1'
	keep if _merge==3
	drop _merge
	
	gen sh_total=(total)/(people+total)
	gen sh_media=(media+tecnica)/(media+tecnica+any_media-any_college) //OJO: antes esto era mas como secondary or higher
	gen sh_college=(superior+postgrado)/(superior+postgrado+any_college)
	save "${int}/chileans_abroad.dta",replace

}

//////////////////
/// ENLISTMENT ///
//////////////////

	* Number of enlisted soldiers. Source: FOIA request to Chilean Ministry of Defense
	import excel using "${raw}/EXTRA/Enlisted.xlsx", clear firstrow 
	replace Enlisted = Enlisted/1000
	keep Enlisted Year 
	compress 
	save "${int}/enlistment.dta", replace

//////////////////////////
/// BACKGROUND FIGURES ///
//////////////////////////
{
	*Enrollment rates. Source: Las Transformaciones Educacionales Bajo el Regimen Militar, vol 2, cuadros 4, 5 6. pp 551, 571, 582.
	use "${raw}/EXTRA/enrollment_levels.dta",clear

	foreach x of varlist sh_bas614 sh_med1519 sh_uni2024{
		gen temp=`x' if year==1970
		egen temp1=max(temp)
		gen ind_`x'=100*`x'/temp1
		drop temp* `x'
	}

	*Share of education spending by level. Source: Las Transformaciones Educacionales Bajo el Regimen Militar, vol 1, Capitulo 5, cuadro 2. pp 205.
	gen sh_sup_educ=0.263 if year==1969
	replace sh_sup_educ=0.291 if year==1970
	replace sh_sup_educ=0.319 if year==1971
	replace sh_sup_educ=0.362 if year==1972
	replace sh_sup_educ=0.410 if year==1973 
	replace sh_sup_educ=0.482 if year==1974
	replace sh_sup_educ=0.415 if year==1975
	replace sh_sup_educ=0.337 if year==1976
	replace sh_sup_educ=0.366 if year==1977
	replace sh_sup_educ=0.337 if year==1978
	replace sh_sup_educ=0.312 if year==1979
	replace sh_sup_educ=0.290 if year==1980

	gen sh_bas_educ=0.504 if year==1969
	replace sh_bas_educ=0.493 if year==1970
	replace sh_bas_educ=0.492 if year==1971
	replace sh_bas_educ=0.469 if year==1972
	replace sh_bas_educ=0.429 if year==1973 
	replace sh_bas_educ=0.387 if year==1974
	replace sh_bas_educ=0.435 if year==1975
	replace sh_bas_educ=0.493 if year==1976
	replace sh_bas_educ=0.479 if year==1977
	replace sh_bas_educ=0.488 if year==1978
	replace sh_bas_educ=0.491 if year==1979
	replace sh_bas_educ=0.510 if year==1980

	gen sh_med_educ=0.138 if year==1969
	replace sh_med_educ=0.128 if year==1970
	replace sh_med_educ=0.111 if year==1971
	replace sh_med_educ=0.094 if year==1972
	replace sh_med_educ=0.090 if year==1973 
	replace sh_med_educ=0.070 if year==1974
	replace sh_med_educ=0.076 if year==1975
	replace sh_med_educ=0.087 if year==1976
	replace sh_med_educ=0.081 if year==1977
	replace sh_med_educ=0.094 if year==1978
	replace sh_med_educ=0.114 if year==1979
	replace sh_med_educ=0.112 if year==1980

	*Total education spending and GDP in constant 1976 USD. Source: Las Transformaciones Educacionales Bajo el Regimen Militar, vol 1, Capitulo 5, cuadro 1. pp 203.
	gen gasto_fiscal_educ=273.3 if year==1965
	replace gasto_fiscal_educ=311.9 if year==1966
	replace gasto_fiscal_educ=324.1 if year==1967
	replace gasto_fiscal_educ=348.7 if year==1968
	replace gasto_fiscal_educ=362.6 if year==1969
	replace gasto_fiscal_educ=414.9 if year==1970
	replace gasto_fiscal_educ=545.8 if year==1971
	replace gasto_fiscal_educ=604.9 if year==1972
	replace gasto_fiscal_educ=. if year==1973 //223 for part of the year
	replace gasto_fiscal_educ=467.2 if year==1974
	replace gasto_fiscal_educ=352.8 if year==1975
	replace gasto_fiscal_educ=363.9 if year==1976
	replace gasto_fiscal_educ=441.2 if year==1977
	replace gasto_fiscal_educ=450.9 if year==1978
	replace gasto_fiscal_educ=467.0 if year==1979
	replace gasto_fiscal_educ=497.4 if year==1980
	
	gen gdp=8240 if year==1965
	replace gdp=8817 if year==1966
	replace gdp=9032 if year==1967
	replace gdp=9303 if year==1968
	replace gdp=9625 if year==1969
	replace gdp=9972 if year==1970
	replace gdp=10739 if year==1971
	replace gdp=10730 if year==1972
	replace gdp=10342 if year==1973
	replace gdp=10927 if year==1974
	replace gdp=9516 if year==1975
	replace gdp=9851 if year==1976
	replace gdp=10822 if year==1977
	replace gdp=11712 if year==1978
	replace gdp=12681 if year==1979
	replace gdp=13505 if year==1980

	gen gasto_educ_gdp=gasto_fiscal_educ/gdp
	drop gasto_fiscal_educ gdp
	
	* College openings and applicants, PAA test-takers
	*Sources: 	Openings (1970-1979) -> Echeverria (1980, p.26)
	*			Openings (1967-1971) -> La Universidad de Chile: Antecedentes e Informaciones (1971, cuadro 16, p. p.35)
	*			Openings (1973-1983) -> Las Transformaciones Educacionales Bajo el Regimen Militar, vol 2, Capitulo 11, cuadro 3. pp 340.
	*			Openings: public and private (1970-1979) -> Echeverria (1980, p.26)
	*			PAA test-takers and applicants (1967-1980) + regular openings -> Compendio estadistico, proceso de admision año academico 2011, Universidad de Chile, cuadro 3. p. 146.
	
	gen openings=15796 if year==1967
	replace openings=17378 if year==1968
	replace openings=17087 if year==1969
	replace openings=20491 if year==1970
	replace openings=38538 if year==1971 
	replace openings=45576 if year==1972
	replace openings=47214 if year==1973
	replace openings=42555 if year==1974
	replace openings=41044 if year==1975
	replace openings=34542 if year==1976
	replace openings=33320 if year==1977
	replace openings=34277 if year==1978
	replace openings=32509 if year==1979
	replace openings=32954 if year==1980

	gen 	regopenings=21083 if year==1967
	replace regopenings=23169 if year==1968
	replace regopenings=22148 if year==1969
	replace regopenings=24321 if year==1970
	replace regopenings=36826 if year==1971 
	replace regopenings=42273 if year==1972
	replace regopenings=39276 if year==1973
	replace regopenings=37213 if year==1974
	replace regopenings=40611 if year==1975
	replace regopenings=33136 if year==1976
	replace regopenings=32338 if year==1977
	replace regopenings=33283 if year==1978
	replace regopenings=31621 if year==1979
	replace regopenings=32398 if year==1980

	gen 	open_publicas=13584 if year==1970
	replace open_publicas=25357 if year==1971 
	replace open_publicas=29784 if year==1972
	replace open_publicas=33257 if year==1973
	replace open_publicas=27972 if year==1974
	replace open_publicas=25091 if year==1975
	replace open_publicas=21237 if year==1976
	replace open_publicas=19486 if year==1977
	replace open_publicas=20950 if year==1978
	replace open_publicas=19588 if year==1979

	gen 	open_privadas=6907 if year==1970
	replace open_privadas=10093 if year==1971 
	replace open_privadas=15792 if year==1972
	replace open_privadas=13957 if year==1973
	replace open_privadas=14583 if year==1974
	replace open_privadas=15953 if year==1975
	replace open_privadas=13305 if year==1976
	replace open_privadas=13834 if year==1977
	replace open_privadas=13327 if year==1978
	replace open_privadas=12921 if year==1979

	gen paatakers=29678 if year==1967
	replace paatakers=31011 if year==1968
	replace paatakers=36042 if year==1969
	replace paatakers=49244 if year==1970
	replace paatakers=51550 if year==1971
	replace paatakers=79059 if year==1972
	replace paatakers=107818 if year==1973
	replace paatakers=103219 if year==1974
	replace paatakers=119504 if year==1975
	replace paatakers=91445 if year==1976
	replace paatakers=84806 if year==1977
	replace paatakers=102494 if year==1978
	replace paatakers=107274 if year==1979
	replace paatakers=110508 if year==1980

	gen applicants=29678 if year==1967
	replace applicants=31011 if year==1968
	replace applicants=36042 if year==1969
	replace applicants=49058 if year==1970
	replace applicants=49778 if year==1971
	replace applicants=75402 if year==1972
	replace applicants=91307 if year==1973
	replace applicants=100237 if year==1974
	replace applicants=102439 if year==1975
	replace applicants=78260 if year==1976
	replace applicants=70156 if year==1977
	replace applicants=80509 if year==1978
	replace applicants=61600 if year==1979
	replace applicants=60845 if year==1980

	foreach x of varlist openings open_publicas open_privadas regopenings applicants paatakers{
		gen `x'_ths=`x'/1000
	}
	drop openings open_publicas open_privadas regopenings applicants paatakers
	
	*Openings by field. Source: Las Transformaciones Educacionales Bajo el Regimen Militar, vol 2, Capitulo 11, cuadro 5. pp 341.	
	gen open_agriculture=3015 if year==1973
	replace open_agriculture=1454 if year==1980

	gen open_art_architecture=1837 if year==1973
	replace open_art_architecture=1182 if year==1980
	
	gen open_naturalsciences_math=1186 if year==1973
	replace open_naturalsciences_math=1565 if year==1980

	gen open_socialsciences=6947 if year==1973
	replace open_socialsciences=3437 if year==1980
	
	gen open_law=820 if year==1973
	replace open_law=508 if year==1980
	
	gen open_humanities=972 if year==1973
	replace open_humanities=956 if year==1980
	
	gen open_education=14090 if year==1973
	replace open_education=10059 if year==1980
	
	gen open_technology_engineering=14041 if year==1973
	replace open_technology_engineering=11076 if year==1980
	
	gen open_health=4161 if year==1973
	replace open_health=2717 if year==1980
		
	sort year
	foreach x of varlist open_agriculture- open_health{
		gen d_`x'=(`x'-`x'[_n-7])/`x'[_n-7] if year==1980
		replace d_`x'=d_`x'*100
	}

	*Enrollment by field. Source:  Brunner (1984, p.58)
	gen sh_matr_agro=5.8 if year==1967 
	replace sh_matr_agro=5.7 if year==1973 
	replace sh_matr_agro=4.7 if year==1980 

	gen sh_matr_art=5.8 if year==1967 
	replace sh_matr_art=4.4 if year==1973 
	replace sh_matr_art=3.9 if year==1980 

	gen sh_matr_nat=0.7 if year==1967 
	replace sh_matr_nat=2.4 if year==1973 
	replace sh_matr_nat=3.2 if year==1980 

	gen sh_matr_soc=15.2 if year==1967 
	replace sh_matr_soc=14.9 if year==1973 
	replace sh_matr_soc=12.0 if year==1980 

	gen sh_matr_law=5.7 if year==1967 
	replace sh_matr_law=2.4 if year==1973 
	replace sh_matr_law=2.3 if year==1980 

	gen sh_matr_huma=2.8 if year==1967 
	replace sh_matr_huma=1.9 if year==1973 
	replace sh_matr_huma=3.0 if year==1980 

	gen sh_matr_educ=28.8 if year==1967 
	replace sh_matr_educ=27.9 if year==1973 
	replace sh_matr_educ=28.7 if year==1980 

	gen sh_matr_inge=22.1 if year==1967 
	replace sh_matr_inge=29.2 if year==1973 
	replace sh_matr_inge=30.0 if year==1980 

	gen sh_matr_salud=13.1 if year==1967 
	replace sh_matr_salud=11.2 if year==1973 
	replace sh_matr_salud=12.2 if year==1980 

	* Enrollment by father's education. Source: Las Transformaciones Educacionales Bajo el Regimen Militar, vol 2, Capitulo 11, cuadro 11. pp 348.	
	gen father_prim=16.3 if year==1976
	replace father_prim=14.4 if year==1981
	gen father_sec=56.3 if year==1976
	replace father_sec=51.9 if year==1981
	gen father_tert=27.4 if year==1976
	replace father_tert=33.7 if year==1981
	
	* Avg PAA score by father's occupation. Source: Las Transformaciones Educacionales Bajo el Regimen Militar, vol 2, Capitulo 11, cuadro 13. pp 351.
	gen paa_avg_gerente=637 if year==1976
	replace paa_avg_gerente=662 if year==1981

	gen paa_avg_profesor=647 if year==1976
	replace paa_avg_profesor=663 if year==1981

	gen paa_avg_tecnico=611 if year==1976
	replace paa_avg_tecnico=632 if year==1981

	gen paa_avg_oficina=596 if year==1976
	replace paa_avg_oficina=621 if year==1981

	gen paa_avg_pequeno=589 if year==1976
	replace paa_avg_pequeno=627 if year==1981

	gen paa_avg_artesano=591 if year==1976
	replace paa_avg_artesano=619 if year==1981

	gen paa_avg_urbcali=572 if year==1976
	replace paa_avg_urbcali=607 if year==1981

	gen paa_avg_agrcali=541 if year==1976
	replace paa_avg_agrcali=592 if year==1981

	gen paa_avg_service=580 if year==1976 //612 en el original esta mal
	replace paa_avg_service=602 if year==1981

	gen paa_avg_urbncal=559 if year==1976
	replace paa_avg_urbncal=601 if year==1981

	gen paa_avg_agrncal=526 if year==1976
	replace paa_avg_agrncal=579 if year==1981

	foreach y in gerente profesor tecnico oficina pequeno artesano urbcali agrcali service urbncal agrncal{
		gen i_paa_avg_`y'=100*paa_avg_`y'/paa_avg_profesor
	}
	
	*Number of schools. Source: Echeverria (1980, p. 29)
	gen establ_pre=48 if year==1969
	replace establ_pre=81 if year==1973
	replace establ_pre=119 if year==1977
	gen establ_bas=7302 if year==1969
	replace establ_bas=8081 if year==1973
	replace establ_bas=8151 if year==1977
	gen establ_med=717 if year==1969
	replace establ_med=718 if year==1973
	replace establ_med=731 if year==1977

	foreach x of varlist establ_pre establ_bas establ_med{
		gen temp=`x' if year==1969
		egen temp1=max(temp)
		gen ind_`x'=100*`x'/temp1
		drop temp* `x'
	}
	
	* School meals. Source: Las Transformaciones Educacionales Bajo el Regimen Militar, vol 2, Capitulo 16, cuadro 1. pp 464.
	gen pct_desayuno=38.6 if year==1965
	replace pct_desayuno=48.3 if year==1966
	replace pct_desayuno=56.3 if year==1967
	replace pct_desayuno=60.8 if year==1968
	replace pct_desayuno=59.7 if year==1969
	replace pct_desayuno=63.6 if year==1970
	replace pct_desayuno=64.0 if year==1971
	replace pct_desayuno=67.8 if year==1972
	replace pct_desayuno=62.4 if year==1973
	replace pct_desayuno=57.4 if year==1974
	replace pct_desayuno=32.4 if year==1975
	replace pct_desayuno=34.3 if year==1976
	replace pct_desayuno=47.1 if year==1977
	replace pct_desayuno=47.2 if year==1978
	replace pct_desayuno=34.0 if year==1979
	replace pct_desayuno=34.8 if year==1980

	gen pct_almuerzo=13.6 if year==1965
	replace pct_almuerzo=20.6 if year==1966
	replace pct_almuerzo=28.2 if year==1967
	replace pct_almuerzo=29.8 if year==1968
	replace pct_almuerzo=29.4 if year==1969
	replace pct_almuerzo=30.3 if year==1970
	replace pct_almuerzo=29.7 if year==1971
	replace pct_almuerzo=31.6 if year==1972
	replace pct_almuerzo=29.1 if year==1973
	replace pct_almuerzo=28.4 if year==1974
	replace pct_almuerzo=25.8 if year==1975
	replace pct_almuerzo=16.1 if year==1976
	replace pct_almuerzo=13.2 if year==1977
	replace pct_almuerzo=13.8 if year==1978
	replace pct_almuerzo=13.2 if year==1979
	replace pct_almuerzo=13.5 if year==1980

	replace pct_desayuno=pct_desayuno/100
	replace pct_almuerzo=pct_almuerzo/100

	*UNESCO: Enrollment and graduating students. Source: UNESCO statistical yearbooks (1963-1989)
	gen enrollment_unesco=31961 if year==1963
	replace enrollment_unesco=36503	if year==1964
	replace enrollment_unesco=43608	if year==1965
	replace enrollment_unesco=51678	if year==1966
	replace enrollment_unesco=57146	if year==1967
	replace enrollment_unesco=52937	if year==1968
	replace enrollment_unesco=73035	if year==1969
	replace enrollment_unesco=78430	if year==1970
	replace enrollment_unesco=101087 if year==1971
	replace enrollment_unesco=127238 if year==1972
	replace enrollment_unesco=146451 if year==1973
	replace enrollment_unesco=145003 if year==1974
	replace enrollment_unesco=149647 if year==1975
	replace enrollment_unesco=134149 if year==1976
	replace enrollment_unesco=131793 if year==1977
	replace enrollment_unesco=130982 if year==1978
	replace enrollment_unesco=127349 if year==1979
	replace enrollment_unesco=120168 if year==1980
	replace enrollment_unesco=120101 if year==1981

	gen graduating_unesco=2712 if year==1963
	replace graduating_unesco=3440	if year==1964
	replace graduating_unesco=2928	if year==1965
	replace graduating_unesco=.	if year==1966
	replace graduating_unesco=.	if year==1967
	replace graduating_unesco=.	if year==1968
	replace graduating_unesco=5971	if year==1969
	replace graduating_unesco=8255	if year==1970
	replace graduating_unesco=. if year==1971
	replace graduating_unesco=. if year==1972
	replace graduating_unesco=8922 if year==1973
	replace graduating_unesco=11128 if year==1974
	replace graduating_unesco=11900 if year==1975
	replace graduating_unesco=. if year==1976
	replace graduating_unesco=14474 if year==1977
	replace graduating_unesco=. if year==1978
	replace graduating_unesco=17640 if year==1979
	replace graduating_unesco=15722 if year==1980
	replace graduating_unesco=20037 if year==1981

	gen sh_grad=graduating_unesco/enrollment_unesco
	
	*college graduation rate from 1992 census
	preserve
		use "${int}/censo1992.dta",clear
		gen grad_rate=yrs_college>=4 if any_college==1
		collapse grad_rate if any_college==1,by(age_21)
		rename age_21 year
		tempfile grad1992
		save `grad1992'
	restore
	merge 1:1 year using `grad1992',nogen
	
	save "${int}/figures.dta", replace
}

//////////////////
/// INEQUALITY ///
//////////////////
{
	use "${int}/eod_all.dta",clear

	keep if ingpers<99999999

	gen decil_ingpers=.
	levelsof year,local (yr)
	foreach i of local yr{
		tempvar dec_`i'
		xtile `dec_`i''=ingpers if year==`i', nq(5)
		replace decil_ingpers=`dec_`i'' if `dec_`i''!=.&decil_ingpers==.
	}

	egen tot_decil=sum(ingpers),by(decil_ingpers year)
	egen tot_year=sum(ingpers),by(year)
	gen pct_decil=tot_decil/tot_year

	collapse (mean) pct_decil,by(decil year)
	reshape wide pct_decil,i(year) j(decil_ingpers)

	gen pct_decil24= pct_decil2+ pct_decil3+ pct_decil4
	gen pct_decil34= pct_decil3+ pct_decil4
	gen pct_decil12= pct_decil1+ pct_decil2

	keep pct_decil5 pct_decil24 pct_decil1 year

	preserve 
		use "${int}/eod_all.dta",clear

		keep if ingpers<99999999
		keep if year>=1964 & year<=2009 // desde inicio gobierno de eduardo frei montalva a fin de gobierno de michelle bachelet 

		gen decil_ingpers=.
		gen quin_ingpers=.
		gen gini=.
		gen ratio_p90p25=.
		gen ratio_p90p50=.
		gen ratio_p90p75=.
		gen ratio_p75p25=.

		levelsof year,local (yr)
		foreach i of local yr{
			
			tempvar dec_`i'
			xtile `dec_`i''=ingpers if year==`i', nq(10)
			replace decil_ingpers=`dec_`i'' if `dec_`i''!=.&decil_ingpers==.
			
			tempvar quin_`i'
			xtile `quin_`i''=ingpers if year==`i', nq(5)
			replace quin_ingpers=`quin_`i'' if `quin_`i''!=.&quin_ingpers==.
			
			ineqdeco ingpers if year==`i'
			replace gini=`r(gini)' if year==`i'
			replace ratio_p90p25=`r(p90)'/`r(p25)' if year==`i'
			replace ratio_p90p50=`r(p90)'/`r(p50)' if year==`i'
			replace ratio_p90p75=`r(p90)'/`r(p75)' if year==`i'
			replace ratio_p75p25=`r(p75)'/`r(p25)' if year==`i'
		}

		collapse (mean) gini, by(year)
		tempfile gini  
		save `gini'.dta, replace
	restore 
	merge 1:1 year using `gini'.dta
	drop _merge 

	compress 
	save "${int}/Data_Figure_5.dta", replace
}
	

	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
